%run "/code/source/notebooks/notebook_settings.py"
import logging
import helpsk as hlp
from helpsk.utility import read_pickle, Timer
from helpsk.sklearn_eval import MLExperimentResults
import source.config.config as config
from source.service.model_registry import ModelRegistry
logging.info("Running experiment notebook for last run.")
2022-09-19 23:47:21 - INFO | Running experiment notebook for last run.
registry = ModelRegistry(tracking_uri=config.experiment_server_url())
experiment = registry.get_experiment_by_name(exp_name=config.experiment_name())
logging.info(f"Experiment id: {experiment.last_run.exp_id}")
logging.info(f"Experiment name: {experiment.last_run.exp_name}")
logging.info(f"Run id: {experiment.last_run.run_id}")
logging.info(f"Metric(s): {experiment.last_run.metrics}")
2022-09-19 23:47:22 - INFO | Experiment id: 1
2022-09-19 23:47:22 - INFO | Experiment name: credit
2022-09-19 23:47:22 - INFO | Run id: 391ab4a486c149d69871ded932297a94
2022-09-19 23:47:22 - INFO | Metric(s): {'roc_auc': 0.762641790401041}
What is the metric/performance from the model associated with the last run?
logging.info(f"last run metrics: {experiment.last_run.metrics}")
2022-09-19 23:47:22 - INFO | last run metrics: {'roc_auc': 0.762641790401041}
What is the metric/performance of the model in production?
production_run = registry.get_production_run(model_name=config.model_name())
logging.info(f"production run metrics: {production_run.metrics}")
2022-09-19 23:47:22 - INFO | production run metrics: {'roc_auc': 0.7566610156276207}
# underlying mlflow object
experiment.last_run.mlflow_entity
<Run: data=<RunData: metrics={'roc_auc': 0.762641790401041}, params={'model__criterion': 'entropy',
'model__max_depth': '70',
'model__max_features': '0.1142268477118407',
'model__max_samples': '0.5483119512487002',
'model__min_samples_leaf': '8',
'model__min_samples_split': '12',
'model__n_estimators': '553',
'prep__non_numeric__encoder__transformer': "OneHotEncoder(handle_unknown='ignore')",
'prep__numeric__imputer__transformer': "SimpleImputer(strategy='median')",
'prep__numeric__pca__transformer': "PCA(n_components='mle')",
'prep__numeric__scaler__transformer': 'None'}, tags={'mlflow.log-model.history': '[{"run_id": "391ab4a486c149d69871ded932297a94", '
'"artifact_path": "model", "utc_time_created": '
'"2022-09-19 23:47:07.844329", "flavors": '
'{"python_function": {"model_path": "model.pkl", '
'"loader_module": "mlflow.sklearn", '
'"python_version": "3.9.13", "env": '
'"conda.yaml"}, "sklearn": {"pickled_model": '
'"model.pkl", "sklearn_version": "1.1.1", '
'"serialization_format": "cloudpickle", "code": '
'null}}, "model_uuid": '
'"27a6e22e8e13445593e83a549cfc574f", '
'"mlflow_version": "1.26.1"}]',
'mlflow.note.content': '2022_09_19_23_46_49',
'mlflow.runName': '2022_09_19_23_46_49',
'mlflow.source.git.commit': 'a7c03aa9bddd2116d19ba60183077e150f12f99b',
'mlflow.source.name': 'source/entrypoints/cli.py',
'mlflow.source.type': 'LOCAL',
'mlflow.user': 'root',
'type': 'BayesSearchCV'}>, info=<RunInfo: artifact_uri='./mlflow-artifact-root/1/391ab4a486c149d69871ded932297a94/artifacts', end_time=1663631230789, experiment_id='1', lifecycle_stage='active', run_id='391ab4a486c149d69871ded932297a94', run_uuid='391ab4a486c149d69871ded932297a94', start_time=1663631209415, status='FINISHED', user_id='root'>>
with Timer("Loading training/test datasets"):
X_train = experiment.last_run.download_artifact(artifact_name='x_train.pkl', read_from=read_pickle)
X_test = experiment.last_run.download_artifact(artifact_name='x_test.pkl', read_from=read_pickle)
y_train = experiment.last_run.download_artifact(artifact_name='y_train.pkl', read_from=read_pickle)
y_test = experiment.last_run.download_artifact(artifact_name='y_test.pkl', read_from=read_pickle)
Timer Started: Loading training/test datasets Timer Finished (0.04 seconds)
logging.info(f"training X shape: {X_train.shape}")
logging.info(f"training y length: {len(y_train)}")
logging.info(f"test X shape: {X_test.shape}")
logging.info(f"test y length: {len(y_test)}")
2022-09-19 23:47:22 - INFO | training X shape: (800, 20) 2022-09-19 23:47:22 - INFO | training y length: 800 2022-09-19 23:47:22 - INFO | test X shape: (200, 20) 2022-09-19 23:47:22 - INFO | test y length: 200
np.unique(y_train, return_counts=True)
(array([0, 1]), array([559, 241]))
train_y_proportion = np.unique(y_train, return_counts=True)[1] \
/ np.sum(np.unique(y_train, return_counts=True)[1])
logging.info(f"balance of y in training: {train_y_proportion}")
2022-09-19 23:47:22 - INFO | balance of y in training: [0.69875 0.30125]
test_y_proportion = np.unique(y_test, return_counts=True)[1] \
/ np.sum(np.unique(y_test, return_counts=True)[1])
logging.info(f"balance of y in test: {test_y_proportion}")
2022-09-19 23:47:22 - INFO | balance of y in test: [0.705 0.295]
results = experiment.last_run.download_artifact(
artifact_name='experiment.yaml',
read_from=MLExperimentResults.from_yaml_file
)
logging.info(f"Best Score: {results.best_score}")
logging.info(f"Best Params: {results.best_params}")
2022-09-19 23:47:22 - INFO | Best Score: 0.762641790401041
2022-09-19 23:47:22 - INFO | Best Params: {'model': 'RandomForestClassifier()', 'max_features': 0.1142268477118407, 'max_depth': 70, 'n_estimators': 553, 'min_samples_split': 12, 'min_samples_leaf': 8, 'max_samples': 0.5483119512487002, 'criterion': 'entropy', 'imputer': "SimpleImputer(strategy='median')", 'scaler': 'None', 'pca': "PCA('mle')", 'encoder': 'OneHotEncoder()'}
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | C | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | learning_rate | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | num_leaves | imputer | scaler | pca | encoder | model_rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11 | 1 | 0.76 | 0.71 | 0.81 | RandomForestClassifier() | NaN | 0.11 | 70.00 | 553.00 | 12.00 | 8.00 | 0.55 | entropy | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer(strategy='median') | None | PCA('mle') | OneHotEncoder() | 1.00 |
| 19 | 2 | 0.76 | 0.72 | 0.80 | XGBClassifier() | NaN | NaN | 1.00 | 896.00 | NaN | NaN | NaN | NaN | 0.03 | 8.00 | 0.80 | 0.91 | 0.83 | 0.00 | 1.41 | NaN | SimpleImputer(strategy='median') | None | None | OneHotEncoder() | 1.00 |
| 0 | 3 | 0.76 | 0.71 | 0.81 | LogisticRegression() | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer() | StandardScaler() | None | OneHotEncoder() | 1.00 |
| 24 | 4 | 0.76 | 0.73 | 0.79 | LGBMClassifier() | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.51 | 0.68 | NaN | 5.68 | 42.57 | 50.00 | SimpleImputer(strategy='median') | None | PCA('mle') | OneHotEncoder() | 1.00 |
| 9 | 6 | 0.75 | 0.70 | 0.81 | ExtraTreesClassifier() | NaN | 0.03 | 84.00 | 1088.00 | 24.00 | 36.00 | 0.98 | gini | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer() | None | None | OneHotEncoder() | 1.00 |
results.to_formatted_dataframe(return_style=True,
include_rank=True,
num_rows=500)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | C | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | learning_rate | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | num_leaves | imputer | scaler | pca | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.763 | 0.711 | 0.814 | RandomForestClassifier() | <NA> | 0.114 | 70.000 | 553.000 | 12.000 | 8.000 | 0.548 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | PCA('mle') | OneHotEncoder() |
| 2 | 0.761 | 0.719 | 0.802 | XGBClassifier() | <NA> | <NA> | 1.000 | 896.000 | <NA> | <NA> | <NA> | <NA> | 0.029 | 8.000 | 0.799 | 0.906 | 0.825 | 0.003 | 1.411 | <NA> | SimpleImputer(strategy='median') | None | None | OneHotEncoder() |
| 3 | 0.759 | 0.713 | 0.805 | LogisticRegression() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | None | OneHotEncoder() |
| 4 | 0.757 | 0.726 | 0.788 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.511 | 0.683 | <NA> | 5.684 | 42.574 | 50.000 | SimpleImputer(strategy='median') | None | PCA('mle') | OneHotEncoder() |
| 5 | 0.756 | 0.726 | 0.787 | RandomForestClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 6 | 0.753 | 0.696 | 0.811 | ExtraTreesClassifier() | <NA> | 0.030 | 84.000 | 1,088.000 | 24.000 | 36.000 | 0.981 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 7 | 0.752 | 0.684 | 0.819 | LogisticRegression() | 0.001 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | MinMaxScaler() | None | OneHotEncoder() |
| 8 | 0.750 | 0.711 | 0.789 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.797 | 0.700 | <NA> | 6.654 | 9.475 | 381.000 | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 9 | 0.744 | 0.708 | 0.780 | RandomForestClassifier() | <NA> | 0.681 | 38.000 | 1,461.000 | 23.000 | 10.000 | 0.553 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 10 | 0.743 | 0.690 | 0.795 | ExtraTreesClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 11 | 0.739 | 0.670 | 0.807 | LogisticRegression() | 23.327 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | StandardScaler() | None | OneHotEncoder() |
| 12 | 0.738 | 0.705 | 0.772 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 13 | 0.737 | 0.696 | 0.779 | RandomForestClassifier() | <NA> | 0.710 | 15.000 | 1,493.000 | 33.000 | 27.000 | 0.914 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | PCA('mle') | OneHotEncoder() |
| 14 | 0.731 | 0.712 | 0.750 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.598 | 0.661 | <NA> | 12.533 | 35.084 | 348.000 | SimpleImputer(strategy='most_frequent') | None | None | CustomOrdinalEncoder() |
| 15 | 0.729 | 0.664 | 0.794 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.845 | 0.453 | <NA> | 16.166 | 40.978 | 351.000 | SimpleImputer(strategy='median') | None | PCA('mle') | OneHotEncoder() |
| 16 | 0.728 | 0.703 | 0.752 | XGBClassifier() | <NA> | <NA> | 15.000 | 1,159.000 | <NA> | <NA> | <NA> | <NA> | 0.032 | 29.000 | 0.834 | 0.520 | 0.503 | 0.003 | 1.839 | <NA> | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() |
| 17 | 0.726 | 0.689 | 0.762 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | StandardScaler() | None | CustomOrdinalEncoder() |
| 18 | 0.726 | 0.682 | 0.770 | XGBClassifier() | <NA> | <NA> | 5.000 | 1,218.000 | <NA> | <NA> | <NA> | <NA> | 0.115 | 2.000 | 0.545 | 0.648 | 0.852 | 0.123 | 1.165 | <NA> | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() |
| 19 | 0.725 | 0.699 | 0.752 | RandomForestClassifier() | <NA> | 0.740 | 14.000 | 1,645.000 | 5.000 | 43.000 | 0.741 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | PCA('mle') | CustomOrdinalEncoder() |
| 20 | 0.723 | 0.666 | 0.779 | ExtraTreesClassifier() | <NA> | 0.857 | 30.000 | 879.000 | 17.000 | 28.000 | 0.563 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 21 | 0.722 | 0.655 | 0.790 | ExtraTreesClassifier() | <NA> | 0.672 | 81.000 | 1,136.000 | 34.000 | 34.000 | 0.971 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() |
| 22 | 0.722 | 0.684 | 0.760 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | StandardScaler() | PCA('mle') | CustomOrdinalEncoder() |
| 23 | 0.722 | 0.658 | 0.786 | ExtraTreesClassifier() | <NA> | 0.781 | 50.000 | 590.000 | 35.000 | 47.000 | 0.846 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | PCA('mle') | OneHotEncoder() |
| 24 | 0.721 | 0.673 | 0.768 | XGBClassifier() | <NA> | <NA> | 3.000 | 682.000 | <NA> | <NA> | <NA> | <NA> | 0.152 | 2.000 | 0.698 | 0.940 | 0.817 | 0.009 | 2.086 | <NA> | SimpleImputer() | None | PCA('mle') | CustomOrdinalEncoder() |
| 25 | 0.718 | 0.695 | 0.740 | XGBClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | imputer | pca | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.763 | 0.711 | 0.814 | 0.114 | 70.000 | 553.000 | 12.000 | 8.000 | 0.548 | entropy | SimpleImputer(strategy='median') | PCA('mle') | OneHotEncoder() |
| 2 | 0.756 | 0.726 | 0.787 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 3 | 0.744 | 0.708 | 0.780 | 0.681 | 38.000 | 1,461.000 | 23.000 | 10.000 | 0.553 | gini | SimpleImputer(strategy='median') | None | CustomOrdinalEncoder() |
| 4 | 0.737 | 0.696 | 0.779 | 0.710 | 15.000 | 1,493.000 | 33.000 | 27.000 | 0.914 | gini | SimpleImputer(strategy='most_frequent') | PCA('mle') | OneHotEncoder() |
| 5 | 0.725 | 0.699 | 0.752 | 0.740 | 14.000 | 1,645.000 | 5.000 | 43.000 | 0.741 | entropy | SimpleImputer(strategy='most_frequent') | PCA('mle') | CustomOrdinalEncoder() |
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | C | imputer | scaler | pca | encoder |
|---|---|---|---|---|---|---|---|---|
| 1 | 0.759 | 0.713 | 0.805 | <NA> | SimpleImputer() | StandardScaler() | None | OneHotEncoder() |
| 2 | 0.752 | 0.684 | 0.819 | 0.001 | SimpleImputer(strategy='median') | MinMaxScaler() | None | OneHotEncoder() |
| 3 | 0.739 | 0.670 | 0.807 | 23.327 | SimpleImputer(strategy='median') | StandardScaler() | None | OneHotEncoder() |
| 4 | 0.726 | 0.689 | 0.762 | 0.000 | SimpleImputer(strategy='median') | StandardScaler() | None | CustomOrdinalEncoder() |
| 5 | 0.722 | 0.684 | 0.760 | 0.000 | SimpleImputer(strategy='median') | StandardScaler() | PCA('mle') | CustomOrdinalEncoder() |
results.plot_performance_across_trials(facet_by='model').show()
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
# height=1000, width=1000).show()
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
height=800)
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()
results.plot_score_vs_parameter(
query='model == "RandomForestClassifier()"',
parameter='max_features',
size='max_depth',
color='encoder',
)
# results.plot_parameter_vs_parameter(
# query='model == "XGBClassifier()"',
# parameter_x='colsample_bytree',
# parameter_y='learning_rate',
# size='max_depth'
# )
# results.plot_parameter_vs_parameter(
# query='model == "XGBClassifier()"',
# parameter_x='colsample_bytree',
# parameter_y='learning_rate',
# size='imputer'
# )
last_model = experiment.last_run.download_artifact(
artifact_name='model/model.pkl',
read_from=read_pickle
)
print(type(last_model.model))
<class 'sklearn.pipeline.Pipeline'>
last_model
SklearnModelWrapper(model=Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer(strategy='median'))),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser(transformer=PCA(n_components='mle')))]),
['duration',
'credit_amount',
'installment_commitment',
'residence_si...
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing',
'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(criterion='entropy',
max_depth=70,
max_features=0.1142268477118407,
max_samples=0.5483119512487002,
min_samples_leaf=8,
min_samples_split=12,
n_estimators=553,
random_state=42))]))In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. SklearnModelWrapper(model=Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer(strategy='median'))),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser(transformer=PCA(n_components='mle')))]),
['duration',
'credit_amount',
'installment_commitment',
'residence_si...
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing',
'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(criterion='entropy',
max_depth=70,
max_features=0.1142268477118407,
max_samples=0.5483119512487002,
min_samples_leaf=8,
min_samples_split=12,
n_estimators=553,
random_state=42))]))Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer(strategy='median'))),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser(transformer=PCA(n_components='mle')))]),
['duration', 'credit_amount',
'installment_commitment',
'residence_since', 'age',
'existing_credi...
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing', 'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(criterion='entropy', max_depth=70,
max_features=0.1142268477118407,
max_samples=0.5483119512487002,
min_samples_leaf=8,
min_samples_split=12, n_estimators=553,
random_state=42))])ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer(strategy='median'))),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser(transformer=PCA(n_components='mle')))]),
['duration', 'credit_amount',
'installment_commitment', 'residence_since',
'age', 'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps=[('encoder',
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status', 'credit_history',
'purpose', 'savings_status', 'employment',
'personal_status', 'other_parties',
'property_magnitude', 'other_payment_plans',
'housing', 'job', 'own_telephone',
'foreign_worker'])])['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer(strategy='median'))
SimpleImputer(strategy='median')
SimpleImputer(strategy='median')
TransformerChooser()
TransformerChooser(transformer=PCA(n_components='mle'))
PCA(n_components='mle')
PCA(n_components='mle')
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore'))
OneHotEncoder(handle_unknown='ignore')
OneHotEncoder(handle_unknown='ignore')
RandomForestClassifier(criterion='entropy', max_depth=70,
max_features=0.1142268477118407,
max_samples=0.5483119512487002, min_samples_leaf=8,
min_samples_split=12, n_estimators=553, random_state=42)test_predictions = last_model.predict(X_test)
test_predictions[0:10]
array([0.3733136 , 0.4118129 , 0.48404516, 0.35906406, 0.17344127,
0.33008237, 0.17795812, 0.40549351, 0.21201897, 0.24426042])
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
actual_values=y_test,
predicted_scores=test_predictions,
score_threshold=0.37
)
evaluator.plot_actual_vs_predict_histogram()
evaluator.plot_confusion_matrix()
evaluator.all_metrics_df(return_style=True,
dummy_classifier_strategy=['prior', 'constant'],
round_by=3)
| Score | Dummy (prior) | Dummy (constant) | Explanation | |
|---|---|---|---|---|
| AUC | 0.794 | 0.500 | 0.500 | Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier) |
| True Positive Rate | 0.593 | 0.000 | 1.000 | 59.3% of positive instances were correctly identified.; i.e. 35 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall |
| True Negative Rate | 0.837 | 1.000 | 0.000 | 83.7% of negative instances were correctly identified.; i.e. 118 "Negative Class" labels were correctly identified out of 141 instances |
| False Positive Rate | 0.163 | 0.000 | 1.000 | 16.3% of negative instances were incorrectly identified as positive; i.e. 23 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances |
| False Negative Rate | 0.407 | 1.000 | 0.000 | 40.7% of positive instances were incorrectly identified as negative; i.e. 24 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances |
| Positive Predictive Value | 0.603 | 0.000 | 0.295 | When the model claims an instance is positive, it is correct 60.3% of the time; i.e. out of the 58 times the model predicted "Positive Class", it was correct 35 times; a.k.a precision |
| Negative Predictive Value | 0.831 | 0.705 | 0.000 | When the model claims an instance is negative, it is correct 83.1% of the time; i.e. out of the 142 times the model predicted "Negative Class", it was correct 118 times |
| F1 Score | 0.598 | 0.000 | 0.456 | The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. |
| Precision/Recall AUC | 0.642 | 0.295 | 0.295 | Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats. |
| Accuracy | 0.765 | 0.705 | 0.295 | 76.5% of instances were correctly identified |
| Error Rate | 0.235 | 0.295 | 0.705 | 23.5% of instances were incorrectly identified |
| % Positive | 0.295 | 0.295 | 0.295 | 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class" |
| Total Observations | 200 | 200 | 200 | There are 200 total observations; i.e. sample size |
evaluator.plot_roc_auc_curve().show()
evaluator.plot_precision_recall_auc_curve().show()
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
evaluator.calculate_lift_gain(return_style=True)
| Gain | Lift | |
|---|---|---|
| Percentile | ||
| 5 | 0.17 | 3.39 |
| 10 | 0.25 | 2.54 |
| 15 | 0.32 | 2.15 |
| 20 | 0.42 | 2.12 |
| 25 | 0.53 | 2.10 |
| 30 | 0.61 | 2.03 |
| 35 | 0.68 | 1.94 |
| 40 | 0.75 | 1.86 |
| 45 | 0.76 | 1.69 |
| 50 | 0.80 | 1.59 |
| 55 | 0.80 | 1.45 |
| 60 | 0.88 | 1.47 |
| 65 | 0.88 | 1.36 |
| 70 | 0.92 | 1.31 |
| 75 | 0.93 | 1.24 |
| 80 | 0.97 | 1.21 |
| 85 | 0.98 | 1.16 |
| 90 | 1.00 | 1.11 |
| 95 | 1.00 | 1.05 |
| 100 | 1.00 | 1.00 |
production_model = production_run.download_artifact(
artifact_name='model/model.pkl',
read_from=read_pickle
)
print(type(production_model.model))
<class 'sklearn.pipeline.Pipeline'>
production_model
SklearnModelWrapper(model=Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration',
'credit_amount',
'installment_commitment',
'residence_since',
'age',
'existing_credits',
'num_dependents']),
('n...
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history',
'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing',
'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500,
random_state=42))]))In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. SklearnModelWrapper(model=Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration',
'credit_amount',
'installment_commitment',
'residence_since',
'age',
'existing_credits',
'num_dependents']),
('n...
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history',
'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing',
'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500,
random_state=42))]))Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment',
'residence_since', 'age',
'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps...,
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history', 'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing', 'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500, random_state=42))])ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment', 'residence_since',
'age', 'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps=[('encoder',
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status', 'credit_history',
'purpose', 'savings_status', 'employment',
'personal_status', 'other_parties',
'property_magnitude', 'other_payment_plans',
'housing', 'job', 'own_telephone',
'foreign_worker'])])['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer())
SimpleImputer()
SimpleImputer()
TransformerChooser()
TransformerChooser()
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore'))
OneHotEncoder(handle_unknown='ignore')
OneHotEncoder(handle_unknown='ignore')
RandomForestClassifier(n_estimators=500, random_state=42)
test_predictions = production_model.predict(X_test)
test_predictions[0:10]
array([0.388, 0.506, 0.724, 0.368, 0.056, 0.472, 0.076, 0.47 , 0.18 ,
0.23 ])
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
actual_values=y_test,
predicted_scores=test_predictions,
score_threshold=0.37
)
evaluator.plot_actual_vs_predict_histogram()
evaluator.plot_confusion_matrix()
evaluator.all_metrics_df(return_style=True,
dummy_classifier_strategy=['prior', 'constant'],
round_by=3)
| Score | Dummy (prior) | Dummy (constant) | Explanation | |
|---|---|---|---|---|
| AUC | 0.823 | 0.500 | 0.500 | Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier) |
| True Positive Rate | 0.746 | 0.000 | 1.000 | 74.6% of positive instances were correctly identified.; i.e. 44 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall |
| True Negative Rate | 0.801 | 1.000 | 0.000 | 80.1% of negative instances were correctly identified.; i.e. 113 "Negative Class" labels were correctly identified out of 141 instances |
| False Positive Rate | 0.199 | 0.000 | 1.000 | 19.9% of negative instances were incorrectly identified as positive; i.e. 28 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances |
| False Negative Rate | 0.254 | 1.000 | 0.000 | 25.4% of positive instances were incorrectly identified as negative; i.e. 15 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances |
| Positive Predictive Value | 0.611 | 0.000 | 0.295 | When the model claims an instance is positive, it is correct 61.1% of the time; i.e. out of the 72 times the model predicted "Positive Class", it was correct 44 times; a.k.a precision |
| Negative Predictive Value | 0.883 | 0.705 | 0.000 | When the model claims an instance is negative, it is correct 88.3% of the time; i.e. out of the 128 times the model predicted "Negative Class", it was correct 113 times |
| F1 Score | 0.672 | 0.000 | 0.456 | The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. |
| Precision/Recall AUC | 0.662 | 0.295 | 0.295 | Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats. |
| Accuracy | 0.785 | 0.705 | 0.295 | 78.5% of instances were correctly identified |
| Error Rate | 0.215 | 0.295 | 0.705 | 21.5% of instances were incorrectly identified |
| % Positive | 0.295 | 0.295 | 0.295 | 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class" |
| Total Observations | 200 | 200 | 200 | There are 200 total observations; i.e. sample size |
evaluator.plot_roc_auc_curve().show()
evaluator.plot_precision_recall_auc_curve().show()
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
evaluator.calculate_lift_gain(return_style=True)
| Gain | Lift | |
|---|---|---|
| Percentile | ||
| 5 | 0.14 | 2.71 |
| 10 | 0.24 | 2.37 |
| 15 | 0.37 | 2.49 |
| 20 | 0.49 | 2.46 |
| 25 | 0.54 | 2.17 |
| 30 | 0.66 | 2.20 |
| 35 | 0.71 | 2.03 |
| 40 | 0.75 | 1.86 |
| 45 | 0.80 | 1.77 |
| 50 | 0.83 | 1.66 |
| 55 | 0.85 | 1.54 |
| 60 | 0.86 | 1.44 |
| 65 | 0.90 | 1.38 |
| 70 | 0.93 | 1.33 |
| 75 | 0.95 | 1.27 |
| 80 | 0.97 | 1.21 |
| 85 | 0.98 | 1.16 |
| 90 | 1.00 | 1.11 |
| 95 | 1.00 | 1.05 |
| 100 | 1.00 | 1.00 |